{
 "cells": [
  {
   "cell_type": "markdown",
   "source": [
    "## 数据预处理"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "### 上机内容：\n",
    "1. 练习异常数据处理的函数\n",
    "2. 练习缺失值处理的函数\n",
    "3. 练习噪声数据处理的函数\n",
    "4. 练习数据集成的函数\n",
    "5. 练习数据变换处理的函数\n",
    "6. 练习归约相关函数。"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "markdown",
   "source": [
    "## 参考书的源码"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%% md\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "outputs": [
    {
     "data": {
      "text/plain": "'C:\\\\Users\\\\Harri\\\\Pattern-recognition-and-data-mining\\\\chapter3'"
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# -*- coding: utf-8 -*-\n",
    "\"\"\"\n",
    "Created on Sat Oct 14 17:03:39 2017\n",
    "\n",
    "@author: wnma3\n",
    "\"\"\"\n",
    "\n",
    "import os\n",
    "import numpy as np\n",
    "\n",
    "import matplotlib.pyplot as plt\n",
    "import pandas as pd\n",
    "\"\"\"\n",
    "代码说明：\n",
    "programmer_1: 制作箱线图\n",
    "data.boxplot-->数据转为箱线图的字典格式\n",
    "plt.annotate-->绘图\n",
    "\n",
    "programmer_2: 计算数据\n",
    "range-->极差\n",
    "var-->方差\n",
    "dis-->四分距\n",
    "\n",
    "programmer_3: 画出盈利图（比例和数值）\n",
    "\n",
    "programmer_4: 计算成对相关性\n",
    "data.corr()-->dataframe中相互之间的相关性\n",
    "data.corr()[u'百合酱蒸凤爪'] -->dataframe某一项与其他项的相关性\n",
    "\"\"\"\n",
    "\n",
    "path = os.path.join(os.getcwd(),\"chapter3\")\n",
    "path"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "outputs": [],
   "source": [
    "def programmer_1(file_name):\n",
    "    catering_sale = file_name\n",
    "    data = pd.read_excel(catering_sale, index_col=u'日期')\n",
    "    plt.figure()\n",
    "\n",
    "    # 画箱线图\n",
    "    p = data.boxplot(return_type='dict')\n",
    "    x = p['fliers'][0].get_xdata()\n",
    "    y = p['fliers'][0].get_ydata()\n",
    "    y = np.sort(y)\n",
    "\n",
    "    for i in range(len(x)):\n",
    "        # 处理临界情况， i=0时\n",
    "        temp = y[i] - y[i - 1] if i != 0 else -78 / 3\n",
    "        # 添加注释, xy指定标注数据，xytext指定标注的位置（所以需要特殊处理）\n",
    "        plt.annotate(\n",
    "            y[i], xy=(x[i], y[i]), xytext=(x[i] + 0.05 - 0.8 / temp, y[i]))\n",
    "    plt.show()\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "-1                 销量\n",
      "日期                \n",
      "2015-03-01    51.0\n",
      "2015-02-28  2618.2\n",
      "2015-02-27  2608.4\n",
      "2015-02-26  2651.9\n",
      "2015-02-25  3442.1\n",
      "...            ...\n",
      "2014-08-06  2915.8\n",
      "2014-08-05  2618.1\n",
      "2014-08-04  2993.0\n",
      "2014-08-03  3436.4\n",
      "2014-08-02  2261.7\n",
      "\n",
      "[201 rows x 1 columns]\n",
      "-2 [1. 1. 1. 1. 1. 1. 1. 1.]\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Miniconda3\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:240: RuntimeWarning: Glyph 38144 missing from current font.\n",
      "  font.set_text(s, 0.0, flags=flags)\n",
      "C:\\ProgramData\\Miniconda3\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:240: RuntimeWarning: Glyph 37327 missing from current font.\n",
      "  font.set_text(s, 0.0, flags=flags)\n",
      "C:\\ProgramData\\Miniconda3\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:203: RuntimeWarning: Glyph 38144 missing from current font.\n",
      "  font.set_text(s, 0, flags=flags)\n",
      "C:\\ProgramData\\Miniconda3\\lib\\site-packages\\matplotlib\\backends\\backend_agg.py:203: RuntimeWarning: Glyph 37327 missing from current font.\n",
      "  font.set_text(s, 0, flags=flags)\n"
     ]
    },
    {
     "data": {
      "text/plain": "<Figure size 432x288 with 1 Axes>",
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAX0AAAD4CAYAAAAAczaOAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjQuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/MnkTPAAAACXBIWXMAAAsTAAALEwEAmpwYAAAhPklEQVR4nO3df3gV1Z3H8fchgaBYCCpEN0lLIBBucsUo0dCnrpJGIiCLP4g0lLbUhgqUpqC19Uce6+pKxa62agRRG9zsapMqReFRECy5rOKCEEEtBAWRsiRYlASwhMaQ8N0/crkbJIEk5hfM5/U898nMmTNnztHwmZszc+84M0NERLyhW2d3QEREOo5CX0TEQxT6IiIeotAXEfEQhb6IiIeEd3YHTub888+3AQMGdHY3RBpVVVVFr169OrsbIid455139plZv8a2denQHzBgACUlJZ3dDZFGrV69mpEjR3Z2N0RO4Jzb1dQ2Te+IiHiIQl+kkz322GP4/X6SkpJ49NFHAXjxxRdJSkqiW7duJ/y1++CDDxIfH09CQgIrVqwIlR84cIDMzEyGDh2Kz+dj7dq1TR5zw4YNhIeHs2jRouPKP//8c2JiYvjpT3/adgOULqVLT++InOk2b97MM888w/r16+nRowejR49m3Lhx+P1+Fi9ezLRp046rX1paSlFREVu2bGHPnj1cffXVbNu2jbCwMGbNmsXo0aNZtGgRNTU1HD58uNFj1tXVcccdd5CRkXHCtnvuuYcrr7yyXcYqXYPe6Yt0oq1bt5KamsrZZ59NeHg4V111FYsXL8bn85GQkHBC/SVLlpCVlUVERARxcXHEx8ezfv16Dh48yBtvvEF2djYAPXr0IDIystFj5uXlMWHCBPr3739c+TvvvMPevXsbPRnImUOhL9KJ/H4/b775JhUVFRw+fJhly5axe/fuJuuXl5cTGxsbWo+JiaG8vJydO3fSr18/br75Zi655BKmTp1KVVVVo/u/9NJLzJgx47jyo0eP8vOf/5yHH3647QYnXZJCX6SFCgsL8fv9pKen4/f7KSwsbHVbPp8vNNUyevRokpOTCQsLa3E7tbW1bNy4kRkzZrBp0yZ69erF3LlzT6g3e/ZsHnroIbp1O/6f/vz58xk7diwxMTGtHoucHjSnL9IChYWF5Obmkp+fT11dHWFhYaEplUmTJrWqzezs7FAbd99990mDNzo6+ri/BMrKyoiOjiYmJoaYmBhSU1MByMzMbDT0S0pKyMrKAmDfvn0sW7aM8PBw1q5dy5tvvsn8+fM5dOgQNTU1nHPOOY22Iac5M+uyr+HDh5tIV5KUlGTFxcVmZhYIBMzMrLi42JKSklrd5t69e83MbNeuXZaQkGD79+8Pbbvqqqtsw4YNofXNmzfbsGHDrLq62j7++GOLi4uz2tpaMzO74oor7IMPPjAzs3vvvdduv/32kx53ypQp9uKLL55Q/uyzz9rMmTNbPR7pfECJNZGreqcv0gJbt27liiuuOK7siiuuYOvWra1uc8KECVRUVNC9e3fmzZtHZGQkL730Ejk5OXz22Wdce+21JCcns2LFCpKSkpg4cSKJiYmEh4czb9680HRQXl4ekydPpqamhoEDB/Lss88CsGDBAgCmT5/e6j7KmcNZF36ISkpKiukTudKV+P1+8vLySEtLC30iNxAIkJOTw+bNmzu7eyIAOOfeMbOUxrbpQq5IC+Tm5pKdnU0gEKC2tpZAIEB2dja5ubmd3TWRZtH0jkgLHLtYm5OTw9atW/H5fMyZM6fVF3FFOpqmd0RaSV+4Jl2VpndERARQ6IuIeIpCX0TEQxT6IiIeotAXEfEQhb6IiIco9EVEPEShLyLiIQp9EREPUeiLiHiIQl9ExEMU+iIiHqLQFxHxEIW+iIiHKPRFRDxEoS8i4iHNCn3n3K3OuS3Ouc3OuULnXE/nXJxz7m3n3EfOuT8653oE60YE1z8Kbh/QoJ27guUfOueuaacxiYhIE04Z+s65aOBnQIqZ+YEwIAt4CPidmcUD+4Hs4C7ZwP5g+e+C9XDOJQb3SwJGA/Odc2FtOxwRETmZ5k7vhANnOefCgbOBT4BvA4uC2wuA64PL1wXXCW5Pd865YHmRmX1hZjuBj4DLv/IIRESk2U75YHQzK3fOPQz8L/APYCXwDnDAzGqD1cqA6OByNLA7uG+tc+4gcF6wfF2DphvuE+KcuwW4BSAqKorVq1e3fFQiHeDQoUP6/ZTTzilD3znXl/p36XHAAeBF6qdn2oWZPQ08DfUPRteDp6Wr0oPR5XTUnOmdq4GdZvaZmR0BFgPfAiKD0z0AMUB5cLkciAUIbu8DVDQsb2QfERHpAM0J/f8FRjjnzg7OzacDpUAAyAzWmQIsCS4vDa4T3F5sZhYszwre3RMHDAbWt80wRESkOZozp/+2c24RsBGoBTZRP/3yKlDknHsgWJYf3CUf+C/n3EdAJfV37GBmW5xzL1B/wqgFZppZXRuPR0RETuKUoQ9gZvcC936p+GMaufvGzKqBm5poZw4wp4V9FBGRNqJP5IqIeIhCX0TEQxT6IiIeotAXEfEQhb6IiIco9EVEPEShLyLiIQp9EREPUeiLiHiIQl9ExEMU+iIiHqLQFxHxEIW+iIiHKPRFRDxEoS8i4iEKfRERD1Hoi4h4iEJfpJMdOHCAzMxMhg4dis/nY+3atQDk5eUxdOhQkpKS+OUvfxmq/+CDDxIfH09CQgIrVqwA4MMPPyQ5OTn06t27N48++miTx9ywYQPh4eEsWrSoXccmXU+zHpcoIu1n1qxZjB49mkWLFlFTU8Phw4cJBAIsWbKE9957j4iICD799FMASktLKSoqYsuWLezZs4err76abdu2kZCQwLvvvgtAXV0d0dHR3HDDDY0er66ujjvuuIOMjIyOGqJ0IXqnL9KJDh48yBtvvEF2djYAPXr0IDIykieffJI777yTiIgIAPr37w/AkiVLyMrKIiIigri4OOLj41m/fv1xba5atYpBgwbxjW98o9Fj5uXlMWHChFCb4i0KfZFOtHPnTvr168fNN9/MJZdcwtSpU6mqqmLbtm28+eabpKamctVVV7FhwwYAysvLiY2NDe0fExNDeXn5cW0WFRUxadKkRo9XXl7OSy+9xIwZM9pvUNKlKfRFOlFtbS0bN25kxowZbNq0iV69ejF37lxqa2uprKxk3bp1/Pu//zsTJ07EzE7ZXk1NDUuXLuWmm25qdPvs2bN56KGH6NZN//S9Sv/nRVqosLAQv99Peno6fr+fwsLCVrcVExNDTEwMqampAGRmZrJx40ZiYmK48cYbcc5x+eWX061bN/bt20d0dDS7d+8O7V9WVkZ0dHRoffny5Vx66aVERUU1erySkhKysrIYMGAAixYt4ic/+Qkvv/xyq/svpx9dyBVpgcLCQnJzc8nPz6euro6wsLDQfHxTUyonc8EFFxAbG8uHH35IQkICq1atIjExkUGDBhEIBEhLS2Pbtm3U1NRw/vnnM378eL773e9y2223sWfPHrZv387ll19+XP9O1o+dO3eGln/4wx8ybtw4rr/++hb3W05fCn2RFpgzZw75+fmkpaWxevVqRo4cSX5+Pjk5Oa0Kfai/sDp58mRqamoYOHAgzz77LL169eJHP/oRfr+fHj16UFBQgHOOpKQkJk6cSGJiIuHh4cybN4+wsDAAqqqqeP3113nqqaeOa3/BggUATJ8+/asNXs4IrjnzhJ0lJSXFSkpKOrsbIiFhYWFUV1fTvXv3UOgfOXKEnj17UldX19ndEwHAOfeOmaU0tk1z+iIt4PP5WLNmzXFla9aswefzdVKPRFpGoS/SArm5uWRnZxMIBKitrSUQCJCdnU1ubm5nd02kWTSnL9ICx+btc3Jy2Lp1Kz6fjzlz5rR6Pl+ko2lOX6SVjs3pi3Q1mtMXERFAoS8i4ikKfRERD1Hoi4h4iEJfRMRDmhX6zrlI59wi59wHzrmtzrlvOufOdc697pzbHvzZN1jXOeced8595Jx73zl3aYN2pgTrb3fOTWmvQYmISOOa+07/MeA1MxsKXAxsBe4EVpnZYGBVcB1gDDA4+LoFeBLAOXcucC+QClwO3HvsRCEiIh3jlKHvnOsDXAnkA5hZjZkdAK4DCoLVCoDrg8vXAf9p9dYBkc65C4FrgNfNrNLM9gOvA6PbcCwiInIKzflEbhzwGfCsc+5i4B1gFhBlZp8E6/wNOPYF3tHA7gb7lwXLmio/jnPuFur/QiAqKorVq1c3dywiHerQoUP6/ZTTTnNCPxy4FMgxs7edc4/x/1M5AJiZOefa5KO9ZvY08DTUfyJXn3iUrkqfyJXTUXPm9MuAMjN7O7i+iPqTwN7gtA3Bn58Gt5cDsQ32jwmWNVUuIiId5JShb2Z/A3Y75xKCRelAKbAUOHYHzhRgSXB5KfCD4F08I4CDwWmgFUCGc65v8AJuRrBMREQ6SHO/ZTMHeN451wP4GLiZ+hPGC865bGAXMDFYdxkwFvgIOBysi5lVOuf+DdgQrHe/mVW2yShERKRZmhX6ZvYu0Ng3tqU3UteAmU20sxBY2IL+iYhIG9InckVEPEShLyLiIQp9EREPUeiLiHiIQl9ExEMU+iIiHqLQFxHxEIW+iIiHKPRFRDxEoS8i4iEKfRERD1Hoi4h4iEJfRMRDFPoiIh6i0BcR8RCFvoiIhyj0RUQ8RKEvIuIhCn0REQ9R6IuIeIhCX0TEQxT6IiIeotAXEfEQhb6IiIco9EVEPEShLyLiIQp9EREPUeiLiHiIQl9ExEMU+nLaqKur45JLLmHcuHEA7Ny5k9TUVOLj4/nOd75DTU1NqO4LL7xAYmIiSUlJfPe73w2Vh4WFkZycTHJyMuPHj2/0OL/97W9JTExk2LBhpKens2vXrvYdGG0ztoKCAgYPHszgwYMpKCho9Dj33HMPw4YNIzk5mYyMDPbs2dO+A5Oux8y67Gv48OEmcswjjzxikyZNsmuvvdbMzG666SYrLCw0M7Np06bZ/Pnzzcxs27ZtlpycbJWVlWZmtnfv3lAbvXr1OuVxiouLraqqyszM5s+fbxMnTmy0XiAQaPVYvuyrjq2iosLi4uKsoqLCKisrLS4uLlSnoYMHD4aWH3vsMZs2bVqbjUG6DqDEmshVvdOX00JZWRmvvvoqU6dOBerfrBQXF5OZmQnAlClTePnllwF45plnmDlzJn379gWgf//+LTpWWloaZ599NgAjRoygrKysjUbRuLYY24oVKxg1ahTnnnsuffv2ZdSoUbz22msnHKt3796h5aqqKpxz7Tk06YIU+nJamD17Nr/5zW/o1q3+V7aiooLIyEjCw8MBiImJoby8HIBt27axbds2vvWtbzFixIjjwq+6upqUlBRGjBgRCtKTyc/PZ8yYMW0/oAbaYmzl5eXExsaG2my4z5fl5uYSGxvL888/z/3339+eQ5MuSKEvXd4rr7xC//79GT58eLPq19bWsn37dlavXk1hYSE//vGPOXDgAAC7du2ipKSEP/zhD8yePZsdO3Y02c5zzz1HSUkJv/jFL44rz8nJoWfPnqSlpdGzZ09ycnK6xNiaa86cOezevZvJkyfzxBNPtKLXcjprdug758Kcc5ucc68E1+Occ2875z5yzv3ROdcjWB4RXP8ouH1AgzbuCpZ/6Jy7ps1HI2ekt956i6VLlzJgwACysrIoLi5m1qxZHDhwgNraWqB+iiQ6Ohqof5c7fvx4unfvTlxcHEOGDGH79u0AoToDBw5k5MiRbNq0qdFj/vnPf2bOnDksXbqUiIiIUHlOTg4LFizg17/+NcuXL+fXv/41CxYsaHXwt9XYoqOj2b17d6jdhvs0ZfLkyfzpT39qVb/lNNbUZP+XX8BtwB+AV4LrLwBZweUFwIzg8k+ABcHlLOCPweVE4D0gAogDdgBhJzumLuTKlwUCgdDFzszMzOMuds6bN8/MzJYvX24/+MEPzMzss88+s5iYGNu3b59VVlZadXV1qDw+Pt62bNlywjE2btxoAwcOtG3btp2wLSIiwh555JFQX8zqL8JGRER06tgqKipswIABVllZaZWVlTZgwACrqKg44RgNx/T444/bhAkTvnK/pevhJBdymxv4McAq4NvAK4AD9gHhwe3fBFYEl1cA3wwuhwfrOeAu4K4GbYbqNfVS6MuXNQzGHTt22GWXXWaDBg2yzMzMUKAfPXrUbr31VvP5fOb3+0Ph+dZbb5nf77dhw4aZ3++33//+96F277nnHluyZImZmaWnp1v//v3t4osvtosvvtj+5V/+JVQPCN3Zcyz0q6qqrP79U+eNzcwsPz/fBg0aZIMGDbKFCxeGyrOzs23Dhg1mZnbjjTdaUlKSXXTRRTZu3DgrKyv7yv2Wrudkoe/qt5+cc24R8CDwNeB24IfAOjOLD26PBZabmd85txkYbWZlwW07gFTgX4P7PBcszw/us+hLx7oFuAUgKipqeFFR0Sn7J9JRMjIymDp1KhMnTuTQoUOcc845vPDCC/z+979n5cqVnd09EQDS0tLeMbOUxraFn2pn59w44FMze8c5N7KN+3YCM3saeBogJSXFRo5s90OKNNu0adNYsGAB8fHxJCYmsnHjRp555hmmT5+OflfldHDK0Ae+BYx3zo0FegK9gceASOdcuJnVUj/9c+z+sHIgFihzzoUDfYCKBuXHNNxH5LSQl5cHwN13380XX3xBREQE06dPD5WLdHWnvHvHzO4ysxgzG0D9hdliM5sMBIDMYLUpwJLg8tLgOsHtxcE5pqVAVvDunjhgMLC+zUYi0kHy8vKorq4mEAhQXV2twJfTSnPe6TflDqDIOfcAsAnID5bnA//lnPsIqKT+RIGZbXHOvQCUArXATDOr+wrHFxGRFmrWhdzOkpKSYiUlJZ3dDfGAjvo6gq78703OHM65Ji/k6hO5IrTuiwe/cccrLd5HpLMp9EVEPEShLyLiIQp9EREPUeiLiHiIQl9ExEMU+iIiHqLQFxHxEIW+iIiHKPRFRDxEoS8i4iEKfRERD1Hoi4h4iEJfRMRDFPoiIh6i0BcR8ZCv8uQskS7r4vtWcvAfR9r9OAPufLVd2+9zVnfeuzejXY8h3qLQlzPSwX8c4a9zr23XY6xevZqRI0e26zHa+6Qi3qPpHRERD1Hoi4h4iEJfRMRDFPoiIh6i0BcR8RCFvoiIh+iWTTkjfc13JxcV3Nn+Bypo3+a/5gNo31tPxVsU+nJG+vvWubpPX6QRmt4REfEQhb6IiIdoekfOWB0yNfJa+3/3jkhbUujLGam95/Oh/qTSEccRaUua3hER8RCFvoiIhyj0RUQ8RKEvIuIhCn0REQ85Zeg752KdcwHnXKlzbotzblaw/Fzn3OvOue3Bn32D5c4597hz7iPn3PvOuUsbtDUlWH+7c25K+w1LREQa05x3+rXAz80sERgBzHTOJQJ3AqvMbDCwKrgOMAYYHHzdAjwJ9ScJ4F4gFbgcuPfYiUJERDrGKUPfzD4xs43B5b8DW4Fo4Dr+/+umCoDrg8vXAf9p9dYBkc65C4FrgNfNrNLM9gOvA6PbcjAiInJyLfpwlnNuAHAJ8DYQZWafBDf9DYgKLkcDuxvsVhYsa6r8y8e4hfq/EIiKimL16tUt6aJIh9Lvp5xumh36zrlzgD8Bs83sc+dcaJuZmXPO2qJDZvY08DRASkqKtfe3GIoANPx9bom0h1pW36xN/pmItFqz7t5xznWnPvCfN7PFweK9wWkbgj8/DZaXA7ENdo8JljVVLtLpzKzFr0Ag0OJ9RDpbc+7ecUA+sNXMfttg01Lg2B04U4AlDcp/ELyLZwRwMDgNtALIcM71DV7AzQiWiYhIB2nO9M63gO8Df3HOvRssuxuYC7zgnMsGdgETg9uWAWOBj4DDwM0AZlbpnPs3YEOw3v1mVtkWgxARkeY5Zeib2RqgqQnP9EbqGzCzibYWAgtb0kEREWk7+kSuiIiHKPRFRDxEoS8i4iEKfRERD1Hoi4h4iEJfRMRDFPoiIh6i0BcR8RCFvoiIhyj0RUQ8RKEvIuIhCn2RTva73/2OpKQk/H4/kyZNorq6GjMjNzeXIUOG4PP5ePzxx4H6h7b06dOH5ORkkpOTuf/++xttc+fOnaSmphIfH893vvMdampqOnJI0oW16MlZItK2ysvLefzxxyktLeWss85i4sSJFBUVYWbs3r2bDz74gG7duvHpp5+G9vnnf/5nXnnllZO2e8cdd3DrrbeSlZXF9OnTyc/PZ8aMGe09HDkN6J2+SCerra3lH//4B7W1tRw+fJh/+qd/4sknn+RXv/oV3brV/xPt379/s9szM4qLi8nMzARgypQpvPzyy+3RdTkNKfRFOlF0dDS33347X//617nwwgvp06cPGRkZ7Nixgz/+8Y+kpKQwZswYtm/fHtpn7dq1XHzxxYwZM4YtW7ac0GZFRQWRkZGEh9f/IR8TE0N5uR5SJ/UU+iKdaP/+/SxZsoSdO3eyZ88eqqqqeO655/jiiy/o2bMnJSUl/PjHP+ZHP/oRAJdeeim7du3ivffeIycnh+uvv75zByCnHYW+SAsVFhbi9/tJT0/H7/dTWFjY6rb+/Oc/ExcXR79+/ejevTs33ngj//M//0NMTAw33ngjADfccAPvv/8+AL179+acc84BYOzYsRw5coR9+/Yd1+Z5553HgQMHqK2tBaCsrIzo6OhW91HOLLqQK9IChYWF5Obmkp+fT11dHWFhYWRnZwMwadKkFrf39a9/nXXr1nH48GHOOussVq1aRUpKCr179yYQCBAXF8d///d/M2TIEAD+9re/ERUVhXOO9evXc/ToUc4777zj2nTOkZaWxqJFi8jKyqKgoIDrrrvuqw9ezgxm1mVfw4cPN5GuJCkpyYqLi83MLBAImJlZcXGxJSUltbrNX/3qV5aQkGBJSUn2ve99z6qrq23//v02duxY8/v9NmLECHv33XfNzCwvL88SExNt2LBhlpqaam+99VaonTFjxlh5ebmZme3YscMuu+wyGzRokGVmZlp1dXWr+yenH6DEmshVV7+9a0pJSbGSkpLO7oZISFhYGNXV1XTv3p3Vq1czcuRIjhw5Qs+ePamrq+vs7okA4Jx7x8xSGtumOX2RFvD5fKxZs+a4sjVr1uDz+TqpRyIto9AXaYHc3Fyys7MJBALU1tYSCATIzs4mNze3s7sm0iy6kCvSAscu1ubk5LB161Z8Ph9z5sxp1UVckc6gOX2RVjo2py/S1WhOX0REAIW+iIinKPRFRDxEoS8i4iEKfRERD1HoiwQdOHCAzMxMhg4dis/nY+3atVRWVjJq1CgGDx7MqFGj2L9/f6P7FhQUMHjwYAYPHkxBQUGofMCAAVx00UUkJyeTklJ/M8WLL75IUlIS3bp142R3p7322mskJCQQHx/P3Llz23aw4lkKfZGgWbNmMXr0aD744APee+89fD4fc+fOJT09ne3bt5Oent5o+FZWVnLffffx9ttvs379eu67777jTg6BQIB33303FPB+v5/Fixdz5ZVXNtmXuro6Zs6cyfLlyyktLaWwsJBVq1aRlpZGYmIiSUlJPPbYYwD84he/YOjQoQwbNowbbriBAwcONNqmTiICCn0RAA4ePMgbb7wR+sbMHj16EBkZyZIlS5gyZQrQ9BOoVqxYwahRozj33HPp27cvo0aN4rXXXmvyWD6fj4SEhJP2Z/369cTHxzNw4EB69OhBVlYWq1at4pFHHqG0tJR169Yxb948SktLGTVqFJs3b+b9999nyJAhPPjggye019hJpLS0tAX/heRModAXof5B4v369ePmm2/mkksuYerUqVRVVbF3714uvPBCAC644AL27t17wr7l5eXExsaG1hs+qco5R0ZGBsOHD+fpp59udn8aa/Pzzz/n0ksvBeBrX/saPp+P8vJyMjIyQk/JGjFiBGVlZSe019hJZMmSJc3uj5w5FPoi1D+nduPGjcyYMYNNmzbRq1evE6ZAnHM458jJyaFnz56kpaXRs2dPXnrppSbbXbNmDRs3bmT58uXMmzePN954o036+9e//pVNmzaRmpp6XPnChQsZM2bMCfVPdmISb+nw0HfOjXbOfeic+8g5d2dHH1+kMTExMcTExIRCNDMzk40bNxIVFcUnn3wCEPo5f/58IiMjcc4RGRnJ2rVrKSoqCrXV8ElVx37279+fG264gfXr1zerP9HR0ezevbvRNg8dOsSECRN49NFH6d27d6jOnDlzCA8PZ/Lkya39zyAe0KGh75wLA+YBY4BEYJJzLrEj+yDSmAsuuIDY2Fg+/PBDAFatWkViYiLjx48P3Y1TUFDAwYMHiYyMpLCwkJUrV1JYWEifPn3YsmUL+/fvZ//+/axcuZJrrrmGqqoq/v73vwNQVVXFypUr8fv9zerPZZddxvbt29m5cyc1NTUUFRUxfvx4jhw5woQJE5g8eXLocYoA//Ef/8Err7zC888/j3PuhPZOdhIRj2nq6Srt8QK+CaxosH4XcFdT9fXkLOlImzZtsuHDh9tFF11k1113nVVWVtq+ffvs29/+tsXHx1t6eroBtmzZMtuwYYONHTvWzMyWLVtmgA0aNMgGDRpkCxcuNLP6p1cNGzbMhg0bZomJifbAAw+YmdnixYstOjraevToYf3797eMjAwzMysvL7cxY8aE+vPqq6/a4MGDbeDAgfbAAw/Y0aNH7fvf/77NmjXruH4vX77cfD6fffrpp02O7ciRIxYXF2cff/yxffHFFzZs2DDbvHlzW/7nky6ErvLkLOdcJjDazKYG178PpJrZTxvUuQW4BSAqKmp4wz+bRTpbWloa06ZNIysri0OHDnHOOedQVFTEU089RSAQaNdj/+Uvf+FnP/sZAwcODL2bnzp1Knl5eRw5ciQ01ZOYmMhtt93Gvn37ePjhh0PXJo7d8XP06FHGjBnD9773vXbtr3SetLS0Jr9ls8uFfkP6amXpas477zwOHjzIb37zGxITEyktLeWXv/wlffr0oaKiorO7JwKc/KuVO/ohKuVAbIP1mGCZyGnhiSeeYNq0adx5550cOXKE7t27c/bZZ/PEE090dtdEmqWj797ZAAx2zsU553oAWcDSDu6DSKtNmjSJp556iiFDhtCtWzeGDBnCU089pSdnyWmjw5+c5ZwbCzwKhAELzWxOU3U1vSNdmZ6cJV1VV5rewcyWAcs6+rgiIqJP5IqIeIpCX0TEQxT6IiIeotAXEfGQDr97pyWcc58Buzq7HyJNOB/Y19mdEGnEN8ysX2MbunToi3RlzrmSpm6LE+mqNL0jIuIhCn0REQ9R6Iu0XvOffyjSRWhOX0TEQ/ROX0TEQxT6IiIeotAXEfGQDv+WTZHTjXPuX4ERQG2wKBxY11iZmf1rR/dPpCUU+iLNk2VmBwCcc5HA7CbKRLo0Te+IiHiIQl9ExEMU+iIiHqLQFxHxEIW+iIiHKPRFRDxEt2yKnNqnwH86544G17sBrzVRJtKl6QvXREQ8RNM7IiIeotAXEfEQhb6IiIco9EVEPEShLyLiIf8He7TFrdNFojUAAAAASUVORK5CYII=\n"
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "programmer_1(path + '/catering_sale.xls')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "\n",
    "def programmer_2(file_name):\n",
    "    catering_sale = file_name\n",
    "    data = pd.read_excel(catering_sale, index_col=u'日期')\n",
    "\n",
    "    data = data[(data[u'销量'] > 400) & data[u'销量'] < 5000]\n",
    "    statistics = data.describe()[u'销量']\n",
    "\n",
    "    statistics['range'] = statistics['max'] - statistics['min']\n",
    "    statistics['var'] = statistics['std'] / statistics['mean']\n",
    "    statistics['dis'] = statistics['75%'] - statistics['25%']\n",
    "\n",
    "    print(statistics)\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# programmer_2(path + '/data/catering_sale.xls')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "def programmer_3(file_name):\n",
    "    dish_profit = file_name  #餐饮菜品盈利数据\n",
    "    data = pd.read_excel(dish_profit, index_col=u'菜品名')\n",
    "    data = data[u'盈利'].copy()\n",
    "    data.sort_values(ascending=False)\n",
    "\n",
    "    plt.figure()\n",
    "    data.plot(kind='bar')\n",
    "    plt.ylabel(u'盈利（元）')\n",
    "    p = 1.0 * data.cumsum() / data.sum()\n",
    "    p.plot(color='r', secondary_y=True, style='-o', linewidth=2)\n",
    "    plt.annotate(\n",
    "        format(p[6], '.4%'),\n",
    "        xy=(6, p[6]),\n",
    "        xytext=(6 * 0.9, p[6] * 0.9),\n",
    "        arrowprops=dict(arrowstyle=\"->\", connectionstyle=\"arc3,rad=.2\"))\n",
    "    plt.ylabel(u'盈利（比例）')\n",
    "    plt.show()\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "# programmer_3(path + '/data/catering_dish_profit.xls')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "def programmer_4(file_name):\n",
    "    catering_sale = file_name\n",
    "    data = pd.read_excel(catering_sale, index_col=u'日期')\n",
    "\n",
    "    data.corr()\n",
    "    data.corr()[u'百合酱蒸凤爪']\n",
    "    data[u'百合酱蒸凤爪'].corr(data[u'翡翠蒸香茜饺'])\n"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "outputs": [],
   "source": [
    "\n",
    "# programmer_4(path + '/data/catering_sale_all.xls')"
   ],
   "metadata": {
    "collapsed": false,
    "pycharm": {
     "name": "#%%\n"
    }
   }
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}